setwd("C:/Users/danie/OneDrive/Hertie Spring 20/R Geospatial Data")
library(tidyverse)
library(reshape2)
library(tmap)
library(raster)
library(RColorBrewer)
library(classInt)
library(viridisLite)
library(sp)
library(ggmap)
library(sf)
library(rgdal)
library(lubridate)
df<-read.csv("Emergency_Response_Incidents.csv")
df_s_all<-df[complete.cases(df),] # Obtaining only complete cases
df_s<-df_s_all[,]
str(df_s)
'data.frame': 7797 obs. of 7 variables:
$ Incident.Type: Factor w/ 443 levels "Administration-1 PP Activation",..: 430 308 14 430 32 430 32 33 269 407 ...
$ Location : Factor w/ 6193 levels "","01 Penn Plaza",..: 831 4853 5835 1621 2105 4530 2133 3952 3863 5593 ...
$ Borough : Factor w/ 62 levels "Astoria","Bergen",..: 46 28 46 46 28 28 28 37 28 59 ...
$ Creation.Date: Factor w/ 7101 levels "01/01/2013 06:45:29 AM",..: 347 5803 6108 6485 6349 6499 6729 6449 6401 6417 ...
$ Closed.Date : Factor w/ 7100 levels "","01/01/2012 06:05:00 AM",..: 1 1 1 1 1 1 1 6418 1 1 ...
$ Latitude : num 40.7 40.7 40.7 40.7 40.7 ...
$ Longitude : num -73.8 -74 -73.8 -73.8 -74 ...
Note: GMap from Google Static Maps API; New York City borough shapefile from: https://data.cityofnewyork.us/City-Government/Borough-Boundaries/tqmj-j8zm
### Map file of NYCwith preset key from Google API ###
#Note: OSM is not available
map_nyc <- get_map(location = c(lon=df_s$Longitude[100],lat = df_s$Latitude[100]), zoom = 10)
Source : https://maps.googleapis.com/maps/api/staticmap?center=40.714422,-74.006076&zoom=10&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx
### NYC Neighbourhood Data
shp_nyc_NTA<- st_read("geo_export_e52111a7-ba1f-48e5-876d-c912e4640c4b.shp")
Reading layer `geo_export_e52111a7-ba1f-48e5-876d-c912e4640c4b' from data source `C:\Users\danie\OneDrive\Hertie Spring 20\R Geospatial Data\geo_export_e52111a7-ba1f-48e5-876d-c912e4640c4b.shp' using driver `ESRI Shapefile'
Simple feature collection with 195 features and 7 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
epsg (SRID): 4326
proj4string: +proj=longlat +ellps=WGS84 +no_defs
#summary(shp_nyc_bor)
### NYC Boroughs Shapefile
shp_nyc_bor<-st_read("geo_export_5912ccc8-6f97-4ae2-bf31-ce6d35ef2fb4.shp")
Reading layer `geo_export_5912ccc8-6f97-4ae2-bf31-ce6d35ef2fb4' from data source `C:\Users\danie\OneDrive\Hertie Spring 20\R Geospatial Data\geo_export_5912ccc8-6f97-4ae2-bf31-ce6d35ef2fb4.shp' using driver `ESRI Shapefile'
Simple feature collection with 5 features and 4 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
epsg (SRID): 4326
proj4string: +proj=longlat +ellps=WGS84 +no_defs
### Confirming projection system of shapefile; default GMaps CRS is WGS84 ###
st_crs(shp_nyc_bor)
Coordinate Reference System:
EPSG: 4326
proj4string: "+proj=longlat +ellps=WGS84 +no_defs"
st_crs(shp_nyc_NTA)
Coordinate Reference System:
EPSG: 4326
proj4string: "+proj=longlat +ellps=WGS84 +no_defs"
### Creating SF file for database
emr_nyc<-st_as_sf(df_s,coords=c("Longitude","Latitude"))
### Checks for consistent CRS
st_crs(emr_nyc)
Coordinate Reference System: NA
st_crs(emr_nyc)<-st_crs(shp_nyc_NTA)
st_crs(shp_nyc_NTA)==st_crs(emr_nyc)
[1] TRUE
### Initial Plot
plot(st_geometry(shp_nyc_NTA))
plot(emr_nyc, add = TRUE, col = "yellow")
ignoring all but the first attribute
The plot below shows the density of emergency response incidents per 1 million sq km. I begin by using the function \(st_intersects()\) and then plot based on each borough. Of the five boroughs, Manhattan has the highest incident density rate, which is not surprising. However, It seems like Manhattan’s high rate has eclipsed that of the other boroughs. We can change this by specifying the breaks in the data visualized.
### Calculating Statistics for Borough-Level
count <- st_intersects(shp_nyc_bor,emr_nyc)
although coordinates are longitude/latitude, st_intersects assumes that they are planar
#Counting number of occurences
n_bor <-lengths(count)
#Calculating Areas
area_bor<-unclass(st_area(shp_nyc_bor))
#Calculating Density
den_bor <-n_bor/area_bor
comb_bor <- shp_nyc_bor %>%
mutate(n = n_bor, den = den_bor, area = area_bor, den1mil = den_bor*1e6)
head(shp_nyc_NTA)
Simple feature collection with 6 features and 7 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: -74.00736 ymin: 40.58731 xmax: -73.75047 ymax: 40.8236
epsg (SRID): 4326
proj4string: +proj=longlat +ellps=WGS84 +no_defs
boro_code boro_name county_fip ntacode ntaname shape_area shape_leng
1 3 Brooklyn 047 BK88 Borough Park 54005019 39247.23
2 4 Queens 081 QN51 Murray Hill 52488278 33266.90
3 4 Queens 081 QN27 East Elmhurst 19726846 19816.71
4 4 Queens 081 QN07 Hollis 22887773 20976.34
5 1 Manhattan 061 MN06 Manhattanville 10647078 17040.69
6 3 Brooklyn 047 BK25 Homecrest 29991967 27514.02
geometry
1 MULTIPOLYGON (((-73.97605 4...
2 MULTIPOLYGON (((-73.80379 4...
3 MULTIPOLYGON (((-73.8611 40...
4 MULTIPOLYGON (((-73.75726 4...
5 MULTIPOLYGON (((-73.94608 4...
6 MULTIPOLYGON (((-73.95859 4...
### Plotting
maps_bor<- mapview(shp_nyc_NTA,zcol ="ntaname",
layer.name = "New York Neighborhoods",
lwd = 1,
color = "black",
alpha.regions = 0, hide = TRUE,legend=FALSE)+
mapview(comb_bor, zcol = "den1mil",
col.regions = sf.colors(10),
alpha.regions = 0.8,
layer.name = "Incidents per 1 million sq km",
at = c(2,5,10,3060))
maps_bor
NA
NA
This map is more useful as we can see finer detail abotu the relative number of incidents, controlling for the area of neighborhoods. Below, we observe that SoHo-TriBeCa-Civic Center-Little Italy has the highest number of incidents. Interestingly, we also see that Kew Gardens in Queens has an unusually high emergency incident rate vis-a-vis other neighborhoods in its surroundings and in Queens.
### Finding out density of incidents per neighborhood
count <- st_intersects(shp_nyc_NTA,emr_nyc)
although coordinates are longitude/latitude, st_intersects assumes that they are planar
#Counting number of occurences
n_res <-lengths(count)
#Calculating Areas
area_NTA<-unclass(st_area(shp_nyc_NTA))
#Calculating Density
den_NTA <-n_res/area_NTA
comb_NTA <- shp_nyc_NTA %>%
mutate(n = n_res, den = den_NTA, area = area_NTA, den1mil = den_NTA*1e6)
### Plotting
# Statistical Viewing
ggplot(comb_NTA)+
geom_histogram(aes(x = n))
ggplot(comb_NTA, aes(x = n, y = area))+
geom_point()+
stat_smooth()
# Using Map View
library(mapview)
maps_NTA<- mapview(shp_nyc_bor,zcol ="boro_name",
layer.name = "New York Boroughs",
lwd = 5,
color = "black",
alpha.regions = 0)+
mapview(comb_NTA, zcol = "den1mil",
col.regions = sf.colors(10),
alpha.regions = 0.8,
layer.name = "Incidents per 1 million sq km",
at = seq(0,200,20))
maps_NTA
# Using Thematic Maps
yellowred<-brewer.pal(n=5,"YlOrRd")
tmap_mode("view")
tmap mode set to interactive viewing
tm_NTA<-tm_shape(comb_NTA)+
tm_borders()+
tm_fill(col = "n", title = "Number of Incidents",
palette = yellowred, style = "quantile",
alpha = 0.6)+
tm_shape(shp_nyc_bor)+
tm_borders(col = "grey60",lwd = 2)+
tm_basemap
tm_NTA